library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
test_data = read_csv('data/test_data.csv', show_col_types = FALSE)
test_df =
test_data %>%
select(phenotype, beta, p, description, group) %>%
arrange(group, phenotype) %>%
mutate(
logp = -log10(test_data$p),
direction = ifelse(test_data$beta >= 0, "Positive", "Negative"),
xpos = row_number()) %>%
na.omit()
colnames(test_data)
## [1] "phenotype" "snp" "adjustment" "beta" "OR"
## [6] "SE" "p" "type" "n_total" "n_cases"
## [11] "n_controls" "HWE_p.min" "allele_freq" "n_no_snp" "k_studies"
## [16] "tau2" "I2.percent" "Q" "Q.df" "Q.p"
## [21] "beta.fixed" "OR.fixed" "SE.fixed" "p.fixed" "beta.random"
## [26] "OR.random" "SE.random" "p.random" "description" "group"
table_cols = c( "phenotype", "description", "group", "snp", "beta","OR", "SE","p",
"n_total", "n_cases", "n_controls", "allele_freq" )
test_data %>%
select(table_cols) %>%
arrange(p) %>%
head(5) %>%
knitr::kable(
col.names = c('Phenocode', 'Phenocode', 'Group', 'SNP', 'Beta', ' OR', 'SE', 'P-value',
'Sample Size', 'Number of Cases', 'Number of Controls','Allele Frequency')
)
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(table_cols)
##
## # Now:
## data %>% select(all_of(table_cols))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
| 727.6 |
Rupture of tendon, nontraumatic |
musculoskeletal |
1:198394253_T |
-0.0544859 |
0.9469718 |
0.0121102 |
0.0000068 |
1199195 |
22957 |
1176238 |
0.7846658 |
| 591.0 |
Urinary tract infection |
genitourinary |
1:198394253_T |
-0.0260412 |
0.9742949 |
0.0058566 |
0.0000087 |
1240410 |
113316 |
1127094 |
0.7837394 |
| 727.0 |
Other disorders of synovium, tendon, and bursa |
musculoskeletal |
1:198394253_T |
-0.0229708 |
0.9772910 |
0.0055977 |
0.0000407 |
1200348 |
122649 |
1077699 |
0.7833641 |
| 242.0 |
Thyrotoxicosis with or without goiter |
endocrine/metabolic |
1:198394253_T |
0.0455155 |
1.0465673 |
0.0135011 |
0.0007483 |
1271439 |
17796 |
1253643 |
0.7840630 |
| 244.4 |
Hypothyroidism NOS |
endocrine/metabolic |
1:198394253_T |
0.0196217 |
1.0198155 |
0.0059710 |
0.0010156 |
1318612 |
130098 |
1188514 |
0.7828356 |
# establish signficance threshold
n_tests <- nrow(test_df)
p_thresh <- 0.05 / n_tests
logp_thresh <- -log10(p_thresh)
cat_ticks =
test_df %>%
group_by(group) %>%
summarize(midpoint = mean(xpos))
# ---- Compute category boundaries & midpoints ----
cat_bounds <- test_df %>%
group_by(group) %>%
summarize(start = min(xpos) - 0.5, end = max(xpos) + 0.5, midpoint = mean(xpos)) %>%
ungroup()
# ---- Create alternating shaded rectangles ----
shapes <- list(
# Bonferroni line
list(
type = "line",
x0 = 0,
x1 = 1,
xref = "paper",
y0 = logp_thresh,
y1 = logp_thresh,
line = list(color = "red", dash = "dash")
)
)
# Add shaded rectangles per category (alternate gray/transparent)
for (i in seq_len(nrow(cat_bounds))) {
if (i %% 2 == 1) { # shade every other category
shapes <- append(shapes, list(
list(
type = "rect",
xref = "x",
yref = "paper",
x0 = cat_bounds$start[i],
x1 = cat_bounds$end[i],
y0 = 0,
y1 = 1,
fillcolor = "lightgray",
opacity = 0.2,
line = list(width = 0)
)
))
}
}
interactive_plot = plot_ly(
data = test_df,
x = ~xpos,
y = ~logp,
type = "scatter",
mode = "markers",
color = ~group,
symbol = ~direction, # shape by sign of beta
symbols = c("triangle-down", "triangle-up"), # map Negative/Positive
text = ~paste("Phenotype:", description,
"<br>Phenocode:", phenotype,
"<br>Group:", group,
"<br>P-value:", signif(p, 3),
"<br>Beta:", signif(beta, 3)),
hoverinfo = "text",
showlegend = FALSE
) %>%
layout(
title = "Interactive PheWAS Manhattan Plot",
xaxis = list(
title = "Phenotype",
type = "linear",
tickvals = cat_ticks$midpoint,
ticktext = cat_ticks$group
),
yaxis = list(title = "-log10(P-value)"),
shapes = shapes
)
interactive_plot
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors